package Spark2SQL

import org.apache.spark.sql.{Dataset, Row, SparkSession}

/**
  * Created by Administrator on 2018/5/28.
  */
object Spark2DataSetWordCount {
  def main(args: Array[String]): Unit = {
    //创建SparkSession
    val spark: SparkSession = SparkSession.builder()
      .appName("Spark2DataSetWordCount")
      .master("local[*]")
      .getOrCreate()

    //指定从哪里读数据
    val lines: Dataset[String] = spark.read.textFile("D:\\SparkTestData\\data.txt")

    //切分压平
    //导入隐式转换
    import spark.implicits._
    val words: Dataset[String] = lines.flatMap(_.split(" "))

    //使用DataSet的API
//    val counts: DataFrame = words.groupBy($"value" as "word").count()
//    counts.show()

    //导入聚合函数
    import org.apache.spark.sql.functions._
    val counts: Dataset[Row] = words.groupBy($"value".as("word")).agg(count("*") as "counts").orderBy($"counts" desc)
    counts.show()

    //释放资源
    spark.stop()
  }
}
